package com.atguigu.app.dws;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONException;
import com.alibaba.fastjson.JSONObject;
import com.alibaba.fastjson.PropertyNamingStrategy;
import com.alibaba.fastjson.serializer.SerializeConfig;
import com.atguigu.bean.UserLoginBean;
import com.atguigu.common.Constant;
import com.atguigu.utils.DateFormatUtil;
import com.atguigu.utils.DorisUtil;
import com.atguigu.utils.KafkaUtil;
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.functions.RichFlatMapFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.state.hashmap.HashMapStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.windowing.AllWindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;

import java.time.Duration;

//数据流:web/app -> Nginx -> 日志服务器(file) -> Flume -> Kafka(ODS) -> FlinkApp -> Kafka(DWD) -> FlinkApp -> Doris
//程  序:Mock -> Nginx(日志服务器、文件) -> f1.sh -> Kafka(ZK) -> DwdTrafficBaseLogSplit -> Kafka(ZK) -> Dws04_UserUserLoginWindow -> Doris
public class Dws04_UserUserLoginWindow {

    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        //1.1 开启CK
        env.enableCheckpointing(10000L);
        CheckpointConfig checkpointConfig = env.getCheckpointConfig();
        checkpointConfig.setCheckpointTimeout(20000L);
        checkpointConfig.setCheckpointStorage("hdfs://hadoop102:8020/flink-ck");
        checkpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        //checkpointConfig.setCheckpointInterval(10000L);
        checkpointConfig.setMinPauseBetweenCheckpoints(5000L);
        checkpointConfig.setMaxConcurrentCheckpoints(2);
        //默认是int类型的最大值
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 5000L));
        env.setStateBackend(new HashMapStateBackend());

        System.setProperty("HADOOP_USER_NAME", "atguigu");

        //2.读取Kafka DWD层页面日志主题数据创建流  提取事件时间
        DataStreamSource<String> kafkaDS = env.fromSource(KafkaUtil.getKafkaSource(Constant.TOPIC_DWD_TRAFFIC_PAGE, "home_detail_view_230315"),
                WatermarkStrategy.noWatermarks(),
                "kafka-source");

        //3.过滤&转换为JSON对象
        SingleOutputStreamOperator<JSONObject> jsonObjDS = kafkaDS.flatMap(new FlatMapFunction<String, JSONObject>() {
            @Override
            public void flatMap(String value, Collector<JSONObject> out) throws Exception {
                if (!"".equals(value)) {
                    try {
                        JSONObject jsonObject = JSON.parseObject(value);
                        String uid = jsonObject.getJSONObject("common").getString("uid");
                        String lastPageId = jsonObject.getJSONObject("page").getString("last_page_id");
                        if (uid != null && (lastPageId == null || "login".equals(lastPageId))) {
                            out.collect(jsonObject);
                        }
                    } catch (JSONException e) {
                        System.out.println("脏数据：" + value);
                    }
                }
            }
        });

        //4.按照Uid分组
        KeyedStream<JSONObject, String> keyedStream = jsonObjDS.keyBy(json -> json.getJSONObject("common").getString("uid"));

        //5.去重  转换为JavaBean对象
        SingleOutputStreamOperator<UserLoginBean> userLoginBeanDS = keyedStream.flatMap(new RichFlatMapFunction<JSONObject, UserLoginBean>() {

            private ValueState<String> lastVisitDtState;

            @Override
            public void open(Configuration parameters) throws Exception {
                lastVisitDtState = getRuntimeContext().getState(new ValueStateDescriptor<String>("last-visit-state", String.class));
            }

            @Override
            public void flatMap(JSONObject value, Collector<UserLoginBean> out) throws Exception {

                //取出相关信息
                String lastDt = lastVisitDtState.value();
                Long ts = value.getLong("ts");
                String curDt = DateFormatUtil.toDate(ts);

                long uv = 0L;
                long backUv = 0L;

                if (lastDt == null) {
                    uv = 1L;
                    lastVisitDtState.update(curDt);
                } else if (!lastDt.equals(curDt)) {
                    uv = 1L;
                    lastVisitDtState.update(curDt);
                    //进一步判断与当天日期的差值
                    if ((DateFormatUtil.toTs(curDt) - DateFormatUtil.toTs(lastDt)) / (24 * 3600 * 1000L) > 7L) {
                        backUv = 1L;
                    }
                }

                if (uv == 1L) {
                    out.collect(new UserLoginBean("", "", curDt, backUv, uv, ts));
                }
            }
        });

        //6.开窗聚合
        SingleOutputStreamOperator<UserLoginBean> resultDS = userLoginBeanDS.assignTimestampsAndWatermarks(WatermarkStrategy.<UserLoginBean>forBoundedOutOfOrderness(Duration.ofSeconds(2)).withTimestampAssigner(new SerializableTimestampAssigner<UserLoginBean>() {
                    @Override
                    public long extractTimestamp(UserLoginBean element, long recordTimestamp) {
                        return element.getTs();
                    }
                })).windowAll(TumblingEventTimeWindows.of(Time.seconds(10)))
                .reduce(new ReduceFunction<UserLoginBean>() {
                    @Override
                    public UserLoginBean reduce(UserLoginBean value1, UserLoginBean value2) throws Exception {
                        value1.setUuCt(value1.getUuCt() + value2.getUuCt());
                        value1.setBackCt(value1.getBackCt() + value2.getBackCt());
                        return value1;
                    }
                }, new AllWindowFunction<UserLoginBean, UserLoginBean, TimeWindow>() {
                    @Override
                    public void apply(TimeWindow window, Iterable<UserLoginBean> values, Collector<UserLoginBean> out) throws Exception {
                        UserLoginBean next = values.iterator().next();
                        next.setEdt(DateFormatUtil.toYmdHms(window.getEnd()));
                        next.setStt(DateFormatUtil.toYmdHms(window.getStart()));
                        out.collect(next);
                    }
                });

        //7.将数据写出到Doris
        resultDS.print(">>>>>>>>");
        resultDS.map(bean -> {
                    SerializeConfig config = new SerializeConfig();
                    config.propertyNamingStrategy = PropertyNamingStrategy.SnakeCase;  // 转成json的时候, 属性名使用下划线
                    return JSON.toJSONString(bean, config);
                })
                .sinkTo(DorisUtil.getDorisSink("dws_user_user_login_window"));

        //8.启动
        env.execute("Dws04_UserUserLoginWindow");

    }

}
